import numpy as np
from sklearn import cross_validation
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
from sklearn.cross_validation import cross_val_score

def load_data(dataset):
    instance = pd.read_csv(dataset, sep=",", header=None)
    #instance = numpy.genfromtxt(dataset, delimiter=',', dtype=None)
    positive_label = np.ones(2083)
    negative_label = np.zeros(2917)
    label = np.concatenate((positive_label,negative_label), axis=0)
    #print(label)
    data = np.column_stack((instance, label))
    permutated_data = np.random.permutation(data)
    return permutated_data

print("I am loading the data...")
dataPath = '8MAnd600.csv'
data = load_data(dataPath)
X = data[:,:-1]
Y = data[:,-1]
clf = RandomForestClassifier(n_estimators=10, max_depth=None,min_samples_split=1, random_state=0)
scores = cross_val_score(clf, X, Y, scoring='accuracy', cv=10)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))